clear
import excel "../Data/malta/pdf_info.xlsx", sheet("pdf_info") clear

rename B buyer
rename C buyer_address
rename D lot
rename E winner
rename F winbid
rename G number_of_bids

/* clean buyer */
replace buyer = subinstr(buyer, char(10), "", .)
replace buyer = subinstr(buyer, "Official name:", "", .)
replace buyer = subinstr(buyer, "Oï¬ƒcial name:", "", .)
replace buyer = strtrim(buyer)
gen lenbuyer = strlen(buyer)
gen lasttwo = substr(buyer,lenbuyer-1,.)
replace buyer = substr(buyer, 1,lenbuyer-2) if lasttwo == "ID"
replace buyer = strtrim(buyer)

drop lenbuyer
gen lenbuyer = strlen(buyer)
gen last15 = substr(buyer,lenbuyer-14,.)
replace buyer = substr(buyer, 1, lenbuyer-15) if last15 == "Postal address:"
drop last15
gen last14 = substr(buyer,lenbuyer-13,.)
replace buyer = substr(buyer, 1, lenbuyer-14) if last14 == "Postal address"
drop last14
drop lenbuyer
gen lenbuyer = strlen(buyer)
gen last8 = substr(buyer,lenbuyer-7,.)
replace buyer = substr(buyer, 1, lenbuyer-8) if last8 == "National"
drop lenbuyer
gen lenbuyer = strlen(buyer)
gen national_pos = strpos(buyer, "National registration number:")
replace buyer = substr(buyer, 1, national_pos-1) if national_pos>0
drop national_pos
drop lenbuyer
drop lasttwo
drop last8
drop if A == . & buyer == "" & buyer_address == "" & winner == "" & winbid == ""

/* clean seller */
replace winner = subinstr(winner, "Official name:", "", .)
replace winner = subinstr(winner, "Oï¬ƒcial name:", "", .)
replace winner = strtrim(winner)
replace winner = subinstr(winner, "Fax:", "", .)
replace winner = strtrim(winner)

/* clean winbid */
gen value_pos = strpos(winbid, "Value")
replace winbid = substr(winbid, value_pos + 5, .) if value_pos>0
gen contract_pos = strpos(winbid, "contract/lot:")
replace winbid = substr(winbid, contract_pos + 14, .) if contract_pos>0
replace winbid = subinstr(winbid, "'", "", .)
replace winbid = subinstr(winbid, "[", "", .)
replace winbid = subinstr(winbid, "]", "", .)
replace winbid = subinstr(winbid, ">", "", .)
replace winbid = "" if winbid == "No file"
replace winbid = "" if winbid == "None"

/* number of bids */
gen numbids_pos = strpos(number_of_bids, "Number of offers received")
replace number_of_bids = substr(number_of_bids, numbids_pos + 26, .) if numbids_pos>0
replace number_of_bids = subinstr(number_of_bids, "'>", "", .)
drop numbids_pos
gen numbids_pos = strpos(number_of_bids, "Number of tenders received")
replace number_of_bids = substr(number_of_bids, numbids_pos + 26, .) if numbids_pos>0
replace number_of_bids = subinstr(number_of_bids, ":", "", .)
replace number_of_bids = subinstr(number_of_bids, "[", "", .)
replace number_of_bids = subinstr(number_of_bids, "]", "", .)
replace number_of_bids = strtrim(number_of_bids)
drop value_pos contract_pos numbids_pos
rename A url_id
replace lot = "Lot1" if lot == "No file"
replace lot = subinstr(lot, "Lot", "", .)
destring lot, replace
duplicates drop
reshape wide  winner winbid number_of_bids, i(url_id) j(lot)

save "../Data/malta/pdf_info.dta", replace

